Bus Network Graph: Experiments on real data¶


In [ ]:
from helper import *
from network.shortest_paths import *
from elements import Stop, Variant, Path
from queries import StopQuery, VariantQuery, PathQuery
from network.shortest_paths import NetworkAnalysisBetweenness
from network.bus import BusNetwork, BusNetworkDijkstra

from ipyleaflet import Map, GeoJSON
import pandas as pd
import itertools as iters
import random

import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from tqdm import tqdm

import timeit

import plotly.io as pio
pio.renderers.default = "jupyterlab"
In [ ]:
net_analysis = NetworkAnalysisBetweenness()
dijkstra = BusNetworkDijkstra()
net = BusNetwork.from_ndjsons(sides_set_type='spatial')
sides_set_type =  spatial
100%|██████████| 297/297 [00:10<00:00, 29.66it/s]
In [ ]:
stops = StopQuery.from_ndjson()
vars = VariantQuery.from_ndjson()

Network Construction¶

Degree distribution of Stops¶

In [ ]:
def mod_row(row):
    if row['StopType'] in [
        'Ô sơn',
        'Biển treo',
        'Trạm tạm',
        'Bến bãi QH chung QH',
        'Ga Metro Số 2',
        'Bến Bãi QH 568'
    ]:
        row['StopType'] = 'Others'

    return row

df_nodes = pd.DataFrame([
    stop.to_dict()
    for stop in net.nodes.values()
]).apply(mod_row, axis=1)

df_nodes['Degree'] = df_nodes['Routes'].apply(lambda s: len(s.split()))
df_nodes = df_nodes[['StopId', 'StopType', 'Degree']]
df_nodes
Out[ ]:
StopId StopType Degree
0 35 Bến xe 26
1 7276 Trụ dừng 21
2 7277 Trụ dừng 19
3 7278 Nhà chờ 14
4 7265 Nhà chờ 5
... ... ... ...
4392 7682 Others 1
4393 7683 Others 1
4394 7684 Others 1
4395 7685 Others 1
4396 7686 Others 1

4397 rows × 3 columns

In [ ]:
bins = [*range(15), 15, 50]
lens = df_nodes.groupby(pd.cut(df_nodes['Degree'], bins=bins)).apply(len)

bins[-1] = '15+'
fig = px.bar(y=bins[1:], x=lens, height=800, width=1400, log_x=True, text_auto=True, color=lens, orientation='h')
fig.update_yaxes(type='category')

fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.update_layout(
    title="Degree distribution of bus stops",
    xaxis_title="Count",
    yaxis_title="Degree (# of routes passing)",
    # legend_title="Legend Title",
    font=dict(
        size=18,
        # color="RebeccaPurple"
    )
)
# fig.update_traces(textposition='outside')

fig.show(renderer='notebook')
C:\Users\ADMIN\AppData\Local\Temp\ipykernel_25672\1233675535.py:2: FutureWarning:

The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.

In [ ]:
bins = [*range(15), 15, 50]
lens_obj = df_nodes.groupby('StopType').apply(
    # # print
    # axis='index',
    lambda g: (g.groupby(pd.cut(df_nodes['Degree'], bins=bins)).apply(len))
    # lambda g: g.value_counts()
).stack().to_dict()

lens_obj

def get_label(label):
    if label.right > 15:
        return '15+'
    return str(label.right)

lens_df = pd.DataFrame([
    {
        'StopType': stop_type,
        'Label': get_label(label),
        'Count': count
    }
    for (stop_type, label), count in lens_obj.items()
])
lens_df

fig = px.bar(
    lens_df, x='Count', y='Label', facet_col='StopType', facet_col_wrap=2,
    height=1350, width=1800, color='Count',
    log_x=True, text_auto=True, orientation='h'
)
fig.update_yaxes(title="Degree (# of routes passing)", type='category')
fig.update_xaxes(title="Count")

fig.update_layout(
    title="Degree distribution",
    # legend_title="Legend Title",
    font=dict(
        size=16,
    )
)

fig.show(renderer='notebook')
C:\Users\ADMIN\AppData\Local\Temp\ipykernel_31452\1622963036.py:5: FutureWarning:

The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.

C:\Users\ADMIN\AppData\Local\Temp\ipykernel_31452\1622963036.py:2: DeprecationWarning:

DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.

StopType Spatial Distribution¶

In [ ]:
def mod_row(row):
    if row['StopType'] in [
        'Trạm tạm',
        'Bến bãi QH chung QH',
        'Ga Metro Số 2',
        'Bến Bãi QH 568'
    ]:
        row['StopType'] = 'Others'
        
    if row['StopType'] in [
        'Ô sơn',
        'Biển treo'
    ]:
        row['StopType'] = 'Ô sơn / Biển treo'

    return row

type_stops_df = pd.DataFrame(
    {
        **stops[stop_id].to_dict()
    }
    for stop_id in stops.ids
)
type_stops_df = type_stops_df[['StopId', 'Code', 'Name', 'StopType', 'Zone', 'Street', 'Lng', 'Lat']].apply(mod_row, axis=1)
type_stops_df
Out[ ]:
StopId Code Name StopType Zone Street Lng Lat
0 35 BX 01 Bến xe buýt Sài Gòn Bến xe Quận 1 Lê Lai 106.689362 10.767676
1 7276 Q1 190 Tôn Thất Tùng Trụ dừng Quận 1 Phạm Ngũ Lão 106.690941 10.767670
2 7277 Q1 191 Nguyễn Thị Nghĩa Trụ dừng Quận 1 Phạm Ngũ Lão 106.693699 10.768788
3 7278 Q1 192 Trường Emst Thalmann Nhà chờ Quận 1 Phạm Ngũ Lão 106.695587 10.769504
4 7265 Q1 179 Trạm Trung chuyển trên đường Hàm Nghi Nhà chờ Quận 1 Hàm Nghi 106.700259 10.770911
... ... ... ... ... ... ... ... ...
4392 7682 SWB1_1 Ga tàu thuỷ Bạch Đằng Others Quận 1 Tôn Đức Thắng 106.706901 10.775264
4393 7683 SWB1_4 Ga tàu thuỷ Bình An Others Quận 2 Đường số 21 106.728734 10.797349
4394 7684 SWB1_8 Ga tàu thuỷ Thanh Đa Others Quận Bình Thạnh Cư xá Thanh Đa 106.715757 10.819385
4395 7685 SWB1_10 Ga tàu thuỷ Hiệp Bình Chánh Others Quận Thủ Đức Đường số 10 106.731475 10.827231
4396 7686 SWB1_11 Ga tàu thuỷ Linh Đông Others Quận Thủ Đức Đường số 36 106.746254 10.835287

4397 rows × 8 columns

In [ ]:
fig = px.scatter_geo(
    type_stops_df, lat='Lat', lon='Lng', color='StopType', width=1400, height=1000, range_color=[0, 2000000],
    hover_data=['StopId', 'Code', 'Name', 'Zone', 'StopType']
)
fig.update_layout(
    title='Spatial distribution of StopType',
    font=dict(
        size=17
    ),
    geo=dict(
        scope = 'asia',
        resolution = 110,
        lonaxis_range= [106.4, 106.9],
        lataxis_range= [10.6, 11],
        landcolor = 'rgb(240, 240, 240)',
    )
)
fig.show(renderer='notebook')

Relationship between Route distance and running time¶

In [ ]:
df_vars = pd.DataFrame([var.to_dict() for var in vars._objs.values()])

df_vars['RouteVarName'] = df_vars['StartStop'] + ' - ' + df_vars['EndStop']
df_vars['Speed'] = df_vars['Distance'] / df_vars['RunningTime'] * 0.06
df_vars['RouteId'] = df_vars['RouteIds'].apply(lambda ids: ids[0])
df_vars['RouteVarId'] = df_vars['RouteIds'].apply(lambda ids: ids[1])
df_vars = df_vars[['RouteId', 'RouteVarId', 'RouteNo', 'RouteVarName', 'Distance', 'RunningTime', 'Speed']]
In [ ]:
# px.histogram(filtered_vars_df['Distance'], nbins=100)
fig = px.scatter(
    df_vars, y='Distance', x='RunningTime', marginal_x='box', marginal_y='box', width=1600, height=900, 
    trendline='ols', trendline_options={"add_constant": False},
    hover_data=['RouteVarName', 'RouteId', 'RouteVarId'],
    color='Speed', color_continuous_scale=px.colors.sequential.Agsunset, range_color=[5, 40]
)
fig.update_layout(
    title="Relationship between Distance and RunningTime",
    # legend_title="Legend Title",
    font=dict(
        size=20,
    )
)
fig.show(renderer='notebook')

Running time of construction algorithms¶

In [ ]:
def run_gen(sides_set_type: str):
    BusNetwork.from_ndjsons(sides_set_type=sides_set_type)

NO_RUNS = 5

times = {}
for mode in ['default', 'spatial']:
    print('Running mode', mode)
    times[mode] = [
        timeit.timeit("run_gen(sides_set_type=mode)", globals=globals(), number=1)
        for _ in range(NO_RUNS)
    ]

times
Running mode default
sides_set_type =  default
100%|██████████| 297/297 [00:42<00:00,  6.92it/s]
sides_set_type =  default
100%|██████████| 297/297 [00:42<00:00,  7.00it/s]
sides_set_type =  default
100%|██████████| 297/297 [00:39<00:00,  7.54it/s]
sides_set_type =  default
100%|██████████| 297/297 [00:39<00:00,  7.60it/s]
sides_set_type =  default
100%|██████████| 297/297 [00:38<00:00,  7.71it/s]
Running mode spatial
sides_set_type =  spatial
100%|██████████| 297/297 [00:07<00:00, 38.92it/s]
sides_set_type =  spatial
100%|██████████| 297/297 [00:07<00:00, 38.38it/s]
sides_set_type =  spatial
100%|██████████| 297/297 [00:07<00:00, 38.87it/s] 
sides_set_type =  spatial
100%|██████████| 297/297 [00:07<00:00, 39.59it/s]
sides_set_type =  spatial
100%|██████████| 297/297 [00:07<00:00, 39.40it/s]
Out[ ]:
{'default': [43.38411490013823,
  42.890710399951786,
  39.85574440006167,
  39.51483920007013,
  38.97113410010934],
 'spatial': [8.083511600038037,
  8.161453299922869,
  8.059146600076929,
  7.91556299990043,
  7.955796599853784]}
In [ ]:
df_times = pd.DataFrame(times, index=['Run #{}'.format(x) for x in range(1, 1 + NO_RUNS)])

NO_ITERS = 297
for mode in ['default', 'spatial']:
    df_times[mode + '_it_per_sec'] = NO_ITERS / df_times[mode]

df_times = df_times[['default', 'default_it_per_sec', 'spatial', 'spatial_it_per_sec']]
df_times
Out[ ]:
default default_it_per_sec spatial spatial_it_per_sec
Run #1 43.384115 6.845824 8.083512 36.741458
Run #2 42.890710 6.924576 8.161453 36.390578
Run #3 39.855744 7.451874 8.059147 36.852537
Run #4 39.514839 7.516164 7.915563 37.521020
Run #5 38.971134 7.621025 7.955797 37.331271
In [ ]:
fin = df_times.sum() / len(df_times)
fin
Out[ ]:
default               40.923309
default_it_per_sec     7.271893
spatial                8.035094
spatial_it_per_sec    36.967373
dtype: float64
In [ ]:
fin['default'] / fin['spatial']
Out[ ]:
5.0930714039938225

Network Analysis¶

Top $k$ most influential Stops¶

In [ ]:
net_analysis.from_net(net=net, dijkstra_engine=dijkstra, alg='tree')
100%|██████████| 4397/4397 [02:26<00:00, 29.94it/s]
In [ ]:
def print_top(k = 10):
    print('[Top k = {} most influential Stops]'.format(k))
    top_stops_id = net_analysis.top_scores(10)
    for stop_id in top_stops_id:
        print(stop_id, net_analysis.scores[stop_id])

print_top()
[Top k = 10 most influential Stops]
268 2625614
267 2625614
1239 2614406
1115 2613916
1393 2607219
1152 2604321
270 2311589
271 2310622
174 2294866
510 2261665
In [ ]:
im_stops_df = pd.DataFrame(
    {
        **stops[stop_id].to_dict(),
        "Score": net_analysis.scores[stop_id]
    }
    for stop_id in net_analysis.top_scores(4397)
)
im_stops_df = im_stops_df[['StopId', 'Code', 'Name', 'StopType', 'Zone', 'Street', 'Lng', 'Lat', 'Score']].sort_values(by='Score')
In [ ]:
fig = px.scatter_geo(
    im_stops_df, lat='Lat', lon='Lng', color='Score', width=1400, height=1000, color_continuous_scale='rdpu', range_color=[0, 2000000],
    hover_data=['StopId', 'Code', 'Name', 'Zone', 'Score']
)
fig.update_layout(
    title='Spatial distribution of Betweenness score',
    font=dict(
        size=17
    ),
    geo=dict(
        scope = 'asia',
        resolution = 110,
        lonaxis_range= [106.4, 106.9],
        lataxis_range= [10.6, 11],
        landcolor = 'rgb(240, 240, 240)',
    )
)
fig.show(renderer='notebook')

Table of Top $k=10$ Stops¶

In [ ]:
stop_list = [
    {
        **stops[stop_id].to_dict(),
        "Score": net_analysis.scores[stop_id]
    }
    for stop_id in net_analysis.top_scores()
]

df = pd.DataFrame(stop_list, index=pd.RangeIndex(1, 11))
df['Routes'] = df['Routes'].apply(lambda routes: len(routes.split()))
df['Location'] = df['Street'] + ', ' + df['Zone']

df = df[['StopId', 'Name', 'Location', 'Street', 'Zone', 'StopType', 'Score']]

df
Out[ ]:
StopId Name Location Street Zone StopType Score
1 268 Mũi tàu Cộng Hòa Trường Chinh, Quận Tân Bình Trường Chinh Quận Tân Bình Trụ dừng 2625614
2 267 Ngã ba Chế Lan Viên Trường Chinh, Quận Tân Bình Trường Chinh Quận Tân Bình Nhà chờ 2625614
3 1239 Bến xe An Sương Quốc lộ 22, Huyện Hóc Môn Quốc lộ 22 Huyện Hóc Môn Trụ dừng 2614406
4 1115 Bến xe An Sương Quốc lộ 22, Quận 12 Quốc lộ 22 Quận 12 Trụ dừng 2613916
5 1393 Ngã tư Trung Chánh Quốc lộ 22, Huyện Hóc Môn Quốc lộ 22 Huyện Hóc Môn Nhà chờ 2607219
6 1152 Trung tâm Văn hóa Quận 12 Quốc lộ 22, Quận 12 Quốc lộ 22 Quận 12 Nhà chờ 2604321
7 270 UBND Phường 15 Trường Chinh, Quận Tân Bình Trường Chinh Quận Tân Bình Nhà chờ 2311589
8 271 Khu Công Nghiệp Tân Bình Trường Chinh, Quận Tân Bình Trường Chinh Quận Tân Bình Nhà chờ 2310622
9 174 Trạm Dệt Thành Công Trường Chinh, Quận Tân Phú Trường Chinh Quận Tân Phú Nhà chờ 2294866
10 510 Bệnh viện Quận Tân Bình Hoàng Văn Thụ, Quận Tân Bình Hoàng Văn Thụ Quận Tân Bình Nhà chờ 2261665

# of edge candidates $|E|$ per Stop¶

In [ ]:
def get_df(mode):
    return pd.DataFrame(
        {
            "RouteId": route_id,
            "RouteVarId": route_var_id,
            "PathLength": path_len,
            **pd.Series(per_stop).describe()
        }
        for (route_id, route_var_id), path_len, per_stop in BusNetwork._analyse_sides_set(sides_set_type=mode)
    )
In [ ]:
df_spatial = get_df('spatial')
df_spatial['Type'] = 'spatial'

df_default = get_df('default')
df_default['Type'] = 'default'
sides_set_type =  spatial
100%|██████████| 297/297 [00:06<00:00, 49.05it/s] 
sides_set_type =  default
100%|██████████| 297/297 [00:00<00:00, 603.26it/s]
In [ ]:
df_all = pd.concat([df_default, df_spatial]).reset_index().drop(columns='index')
df_all
Out[ ]:
RouteId RouteVarId PathLength count mean std min 25% 50% 75% max Type
0 3 5 483 53.0 483.0 0.000000 483.0 483.00 483.0 483.00 483.0 default
1 7 14 222 50.0 222.0 0.000000 222.0 222.00 222.0 222.00 222.0 default
2 1 1 249 29.0 249.0 0.000000 249.0 249.00 249.0 249.00 249.0 default
3 3 6 327 56.0 327.0 0.000000 327.0 327.00 327.0 327.00 327.0 default
4 7 13 235 44.0 235.0 0.000000 235.0 235.00 235.0 235.00 235.0 default
... ... ... ... ... ... ... ... ... ... ... ... ...
589 313 1 99 2.0 2.5 0.707107 2.0 2.25 2.5 2.75 3.0 spatial
590 314 2 73 2.0 1.5 0.707107 1.0 1.25 1.5 1.75 2.0 spatial
591 313 2 30 2.0 1.5 0.707107 1.0 1.25 1.5 1.75 2.0 spatial
592 337 1 54 5.0 6.8 3.898718 3.0 4.00 5.0 11.00 11.0 spatial
593 337 2 52 5.0 7.2 3.271085 4.0 4.00 7.0 10.00 11.0 spatial

594 rows × 12 columns

In [ ]:
df_default['mean'].describe()
Out[ ]:
count    297.000000
mean     247.265993
std      155.000806
min        6.000000
25%      154.000000
50%      240.000000
75%      333.000000
max      781.000000
Name: mean, dtype: float64
In [ ]:
df_spatial['mean'].describe()
Out[ ]:
count    297.000000
mean       6.320350
std        2.569525
min        1.000000
25%        4.650000
50%        6.275000
75%        7.660000
max       14.040000
Name: mean, dtype: float64
In [ ]:
fig = px.scatter(
    df_all, x='PathLength', y='count', marginal_x='box', marginal_y='box', width=1600, height=900, 
    trendline='ols', trendline_options={"add_constant": False},
    hover_data=['RouteId', 'RouteVarId', 'count', 'mean'],
    color='Type'
)
fig.show(renderer='notebook')
In [ ]:
fig = px.scatter(
    df_all, x='PathLength', y='mean', marginal_x='box', marginal_y='box', log_y=True, width=1600, height=900, 
    trendline='ols', trendline_options={"add_constant": True},
    hover_data=['RouteId', 'RouteVarId', 'count', 'mean'],
    color='Type'
)

fig.update_xaxes(title="# of Path sides")
fig.update_yaxes(title="# of candidate edges |E|")

fig.update_layout(
    font=dict(
        size=20,
    )
)
fig.show(renderer='notebook')

Running time between Betweenness Analysis algorithms¶

In [ ]:
def run_analysis(alg: str):
    _net_analysis = NetworkAnalysisBetweenness()
    _net_analysis.from_net(net=net, dijkstra_engine=None, alg=alg)

NO_RUNS = 5

times = {}
for mode in ['default', 'tree']:
    print('Running mode', mode)
    times[mode] = [
        timeit.timeit("run_analysis(alg=mode)", globals=globals(), number=1)
        for _ in range(NO_RUNS)
    ]

times
Running mode default
100%|██████████| 4397/4397 [09:28<00:00,  7.74it/s]
100%|██████████| 4397/4397 [08:54<00:00,  8.22it/s]
100%|██████████| 4397/4397 [07:40<00:00,  9.55it/s]
100%|██████████| 4397/4397 [07:30<00:00,  9.76it/s]
100%|██████████| 4397/4397 [07:16<00:00, 10.06it/s]
Running mode tree
100%|██████████| 4397/4397 [02:28<00:00, 29.56it/s]
100%|██████████| 4397/4397 [02:21<00:00, 31.05it/s]
100%|██████████| 4397/4397 [02:23<00:00, 30.59it/s]
100%|██████████| 4397/4397 [02:17<00:00, 32.01it/s]
100%|██████████| 4397/4397 [02:17<00:00, 31.92it/s]
Out[ ]:
{'default': [568.2074843998998,
  534.7812418001704,
  460.3806853001006,
  450.74478500010446,
  436.9792862001341],
 'tree': [148.74347339989617,
  141.63267030008137,
  143.76318740006536,
  137.3843427998945,
  137.74046280002221]}
In [ ]:
df_times = pd.DataFrame(times, index=['Run #{}'.format(x) for x in range(1, 1 + NO_RUNS)])

NO_ITERS = 4397
for mode in ['default', 'tree']:
    df_times[mode + '_it_per_sec'] = NO_ITERS / df_times[mode]

df_times = df_times[['default', 'default_it_per_sec', 'tree', 'tree_it_per_sec']]
df_times
Out[ ]:
default default_it_per_sec tree tree_it_per_sec
Run #1 568.207484 7.738370 148.743473 29.560961
Run #2 534.781242 8.222054 141.632670 31.045097
Run #3 460.380685 9.550792 143.763187 30.585020
Run #4 450.744785 9.754966 137.384343 32.005103
Run #5 436.979286 10.062262 137.740463 31.922355
In [ ]:
fin = df_times.sum() / len(df_times)
fin
Out[ ]:
default               490.218697
default_it_per_sec      9.065689
tree                  141.852827
tree_it_per_sec        31.023707
dtype: float64
In [ ]:
print('Improvement: {}%'.format((fin['default'] / fin['tree'] * 100).round(2)))
Improvement: 345.58%

Search-space efficient shortest path algorihms¶

In [ ]:
from importlib import reload  
# reload(NetworkDijkstraSingleDestination)
In [ ]:
from network.shortest_paths import NetworkDijkstraSingleDestination, NetworkBidirectionalDijkstra
from network.shortest_paths.contraction_hierarchies import (
    NetworkContractionHierarchiesRandom,
    NetworkContractionHierarchiesPeriodicED,
    NetworkContractionHierarchiesLazyED
)
In [ ]:
stops_df = pd.DataFrame(
    {
        **stops[stop_id].to_dict()
    }
    for stop_id in stops.ids
)
stops_df = stops_df[['StopId', 'Code', 'Name', 'Zone', 'Street', 'Lng', 'Lat']]
stops_df
Out[ ]:
StopId Code Name Zone Street Lng Lat
0 35 BX 01 Bến xe buýt Sài Gòn Quận 1 Lê Lai 106.689362 10.767676
1 7276 Q1 190 Tôn Thất Tùng Quận 1 Phạm Ngũ Lão 106.690941 10.767670
2 7277 Q1 191 Nguyễn Thị Nghĩa Quận 1 Phạm Ngũ Lão 106.693699 10.768788
3 7278 Q1 192 Trường Emst Thalmann Quận 1 Phạm Ngũ Lão 106.695587 10.769504
4 7265 Q1 179 Trạm Trung chuyển trên đường Hàm Nghi Quận 1 Hàm Nghi 106.700259 10.770911
... ... ... ... ... ... ... ...
4392 7682 SWB1_1 Ga tàu thuỷ Bạch Đằng Quận 1 Tôn Đức Thắng 106.706901 10.775264
4393 7683 SWB1_4 Ga tàu thuỷ Bình An Quận 2 Đường số 21 106.728734 10.797349
4394 7684 SWB1_8 Ga tàu thuỷ Thanh Đa Quận Bình Thạnh Cư xá Thanh Đa 106.715757 10.819385
4395 7685 SWB1_10 Ga tàu thuỷ Hiệp Bình Chánh Quận Thủ Đức Đường số 10 106.731475 10.827231
4396 7686 SWB1_11 Ga tàu thuỷ Linh Đông Quận Thủ Đức Đường số 36 106.746254 10.835287

4397 rows × 7 columns

In [ ]:
fig = px.scatter_geo(
    stops_df, lat='Lat', lon='Lng', color='Zone', width=1400, height=1000,
    hover_data=['StopId', 'Code', 'Name', 'Zone']
)
fig.update_layout(
    title='Spatial distribution of StopType',
    font=dict(
        size=17
    ),
    geo=dict(
        scope = 'asia',
        resolution = 110,
        lonaxis_range= [106.4, 106.9],
        lataxis_range= [10.6, 11],
        landcolor = 'rgb(240, 240, 240)',
    )
)
fig.show(renderer='notebook')
In [ ]:
dijkstra_1 = NetworkDijkstraSingleDestination.from_net(net=net)
dijkstra_2 = NetworkBidirectionalDijkstra.from_net(net=net)

ch_0 = NetworkContractionHierarchiesRandom.from_net(net=net)
ch_1 = NetworkContractionHierarchiesPeriodicED.from_net(net=net)
ch_2 = NetworkContractionHierarchiesLazyED.from_net(net=net)
100%|██████████| 4397/4397 [00:37<00:00, 118.49it/s] 
100%|██████████| 4397/4397 [00:41<00:00, 106.79it/s]
100%|██████████| 4397/4397 [00:00<00:00, 4936.87it/s]
100%|██████████| 4397/4397 [00:03<00:00, 1245.85it/s]
In [ ]:
NODES_AT_LEVEL = [
    [2045, 455, 2342, 271],
    [3223, 117, 4263, 521],
    [962, 3901, 7167, 7213]
]
In [ ]:
pd.DataFrame(
    {
        name: {
            'Shortcuts': engine.no_shortcuts,
            'Density': engine.no_shortcuts / len(net.nodes)
        }
        for name, (engine) in zip(names[2:], [ch_0, ch_1, ch_2])
    }
).round(2)
Out[ ]:
CH (random node order) CH (periodic ED update) CH (lazy ED update)
Shortcuts 54210.00 13567.00 7447.00
Density 12.33 3.09 1.69
In [ ]:
def analyse_pair(src: int, dest: int):
    dist_1, _ = dijkstra_1.path(src, dest)
    dist_2, _ = dijkstra_2.path(src, dest)
    dist_3, _ = ch_0.path(src, dest)
    dist_4, _ = ch_1.path(src, dest)
    dist_5, _ = ch_2.path(src, dest)

    return [
        (dist_1, dijkstra_1),
        (dist_2, dijkstra_2),
        (dist_3, ch_0),
        (dist_4, ch_1),
        (dist_5, ch_2)
    ]

names = [
    'Dijkstra',
    'Bidirectional Dijkstra',
    'CH (random node order)',
    'CH (periodic ED update)',
    'CH (lazy ED update)',
]

def analyse_pair_length(src: int, dest: int):
    analysis = analyse_pair(src, dest)
    dist = analysis[0][0]
    return {
        'Src': src,
        'Dest': dest, 
        'Dist': dist,
        **{
            # engine.__class__.__name__: len(engine.search_space)
            name: len(engine.search_space)
            for name, (d, engine) in zip(names, analysis)
        }
    }

analyse_pair_length(2045, 455)
Out[ ]:
{'Src': 2045,
 'Dest': 455,
 'Dist': 33.23761501573184,
 'Dijkstra': 589,
 'Bidirectional Dijkstra': 316,
 'CH (random node order)': 398,
 'CH (periodic ED update)': 165,
 'CH (lazy ED update)': 70}
In [ ]:
df = pd.DataFrame([
    dict({
        'Src': x,
        'Dest': y,
        **analyse_pair_length(x, y)
    })

    for x, y in tqdm(random.sample(list(iters.combinations(stops.ids, 2)), k=3000))

    # for nodes_lvl in NODES_AT_LEVEL 
    # for (x, y) in random.sample(
    #     list(iters.combinations(nodes_lvl, 2)), 
    #     k=2
    # )
]).sort_values(by='Dist').reset_index().drop(columns=['index'])
df
Out[ ]:
Src Dest Dist Dijkstra Bidirectional Dijkstra CH (random node order) CH (periodic ED update) CH (lazy ED update)
0 2150 3020 1.027989 5 5 24 7 10
1 2584 2595 1.877346 9 8 71 32 23
2 1298 2712 2.301905 5 5 72 5 6
3 4294 2717 3.095989 5 7 138 10 30
4 7265 120 3.166735 43 44 264 44 37
... ... ... ... ... ... ... ... ...
2995 1094 7511 inf 4361 4364 428 150 88
2996 3569 7495 inf 4361 4365 207 117 30
2997 859 7489 inf 4361 4366 362 66 4
2998 2558 7492 inf 4361 4365 272 121 64
2999 1170 7489 inf 4361 4366 59 98 55

3000 rows × 8 columns

In [ ]:
df = df[df['Dist'] != float('inf')]
In [ ]:
df.describe().drop(columns=['Src', 'Dest']).round(2)
Out[ ]:
Dist Dijkstra Bidirectional Dijkstra CH (random node order) CH (periodic ED update) CH (lazy ED update)
count 2946.00 2946.00 2946.00 2946.00 2946.00 2946.00
mean 72.26 2481.48 611.56 278.29 125.60 68.76
std 43.65 1233.94 380.82 93.87 55.22 20.24
min 1.03 5.00 5.00 9.00 5.00 6.00
25% 42.08 1470.50 290.25 214.00 86.00 56.00
50% 61.21 2642.50 570.50 296.00 118.00 71.00
75% 92.35 3546.00 887.00 349.00 155.00 84.00
max 310.85 4361.00 1669.00 490.00 346.00 126.00
In [ ]:
analyse_pair(35, 345)
Out[ ]:
[(45.836360352544396,
  <network.shortest_paths.dijkstra.NetworkDijkstraSingleDestination at 0x1a2f4355490>),
 (45.83636035254439,
  <network.shortest_paths.bidirectional_dijkstra.NetworkBidirectionalDijkstra at 0x1a2f2d52f90>),
 (45.83636035254439,
  <network.shortest_paths.contraction_hierarchies.random.NetworkContractionHierarchiesRandom at 0x1a2f37aba90>),
 (45.836360352544396,
  <network.shortest_paths.contraction_hierarchies.periodic_ED.NetworkContractionHierarchiesPeriodicED at 0x1a2f3793bd0>),
 (45.83636035254439,
  <network.shortest_paths.contraction_hierarchies.lazy_ED.NetworkContractionHierarchiesLazyED at 0x1a2f68cd610>)]
In [ ]:
fig = ff.create_distplot(
    [df['Dist']],
    group_labels = ['Dist'],
    bin_size = 5,
    show_rug = False,
    # width=1400,
    # height=1000,
    # trendline="lowess", 
    # trendline_options=dict(frac=0.1),
    # color_discrete_sequence=px.colors.qualitative.T10[:],
)

fig.update_xaxes(title="Actual Distance")
fig.update_yaxes(title="Count")

fig.update_layout(
    font=dict(
        size=18,
    ),
    title_text="Actual Distance distribution of generated pairs"
)
fig.show(renderer='notebook')
In [ ]:
fig = px.scatter(
    df,
    width=1400,
    height=700,
    x = 'Dist',
    y = [
        'Dijkstra',
        'Bidirectional Dijkstra'
    ],
    opacity=0.10,
    trendline="lowess", 
    trendline_options=dict(frac=0.1),
    color_discrete_sequence=px.colors.qualitative.T10[:],
    marginal_y = "box"
)

fig.update_xaxes(title="Actual Distance")
fig.update_yaxes(title="Search space size")

fig.update_layout(
    font=dict(
        size=18,
    ),
    title="Performance comparison: Dijkstra vs. Bidirectional Dijkstra"
)
fig.show(renderer='notebook')
In [ ]:
fig = px.scatter(
    df,
    width=1400,
    height=700,
    x = 'Dist',
    y = [
        'CH (random node order)',
        'CH (periodic ED update)',
        'CH (lazy ED update)'
    ],
    opacity=0.10,
    trendline="lowess", 
    trendline_options=dict(frac=0.1),
    color_discrete_sequence=px.colors.qualitative.T10[2:],
    marginal_y="box"
)

fig.update_xaxes(title="Actual Distance")
fig.update_yaxes(title="Search space size")

fig.update_layout(
    font=dict(
        size=18,
    ),
    title="Performance comparison: Contraction Hierarchies variants"
)
fig.show(renderer='notebook')
In [ ]:
fig = px.scatter(
    df,
    width=1400,
    height=700,
    x = df['Dist'],
    y = [df[col] for col in [
        'Bidirectional Dijkstra',
        'CH (random node order)',
        'CH (lazy ED update)'
    ]],
    log_y = True,
    opacity=0.10,
    trendline="lowess", 
    trendline_options=dict(frac=0.1),
    color_discrete_sequence=[
        px.colors.qualitative.T10[x]
        for x in [1, 2, 4]
    ],
    marginal_y = "box"
)

fig.update_xaxes(title="Actual Distance")
fig.update_yaxes(title="Search space size")

fig.update_layout(
    font=dict(
        size=18,
    ),
    title="Performance comparison: Bidirectional Dijkstra vs. Contraction Hierarchies"
)
fig.show(renderer='notebook')